###Watercolor regression in R 

is_installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])

load_or_install<-function(package_names)
{

  for(package_name in package_names)
  {
    if(!is_installed(package_name))
    {
       install.packages(package_name,repos="http://lib.stat.cmu.edu/R/CRAN")
    }
    library(package_name,character.only=TRUE,quietly=TRUE,verbose=FALSE)
  }
}


bottom <- function(frame, rows=5) {

frame[(nrow(frame)-rows:nrow(frame)),]

}



DTUniqueBy <- function(data, varvec) {
  data <- as.data.table(data)
  data[!duplicated(data.frame(data[, varvec, with=F]))]
}

panelStructure <- function(group,timevec) {


tt<-rep(timevec,length(group))
tt2 <- as.character(sort(rep(group,length(timevec))))
mat <- cbind("group"=data.frame(tt2),"timevec"=data.frame(tt))
names(mat)<-c("group","timevec")
mat
}
 
preProcess <- function(content, keepnum=FALSE, keepperiod=FALSE,tolower=TRUE) {
if(keepperiod==TRUE) {
content <- gsub("[^[:alnum:].,]", " ", content )
} 
else {
content <- gsub("[^[:alnum:]]", " ", content )
}
content <- gsub("[[:space:]]+", " ", content )
if(tolower==TRUE) {
content <- tolower(content)
}

#content  <- content [!grepl("^(| |\\.)*$", content )]

content  <- gsub("^ *", "  ", content )
if(keepnum==FALSE) {
content  <- gsub("([0-9])*", "", content )
}
content  <- gsub("^-*", "", content )
content  <- gsub(" +", " ", content )
content  <- gsub(" *$", "", content )
#content  <- content[!grepl("^(| |\\.\\,)*$",content )]
content  <- gsub(" *$", "", content )
content  <- gsub("^ *", "", content )

content
}


vwReg<-function(formula, data, title="", B=1000,limitdens=FALSE, shade=TRUE, shade.alpha=.1, CIcol="", spag=FALSE, spag.color="darkblue", mweight=TRUE, show.lm=FALSE, show.points=FALSE,show.median = TRUE, median.col = "white", shape = 21, show.CI=TRUE, method=loess, bw=FALSE, slices=300, palette=colorRampPalette(c("#FFEDA0", "#DD0000"), bias=2)(20), ylim=NULL, quantize = "continuous",  add=FALSE, ...) {
    IV <- all.vars(formula)[2]
    DV <- all.vars(formula)[1]
    data <- na.omit(data[order(data[, IV]), c(IV, DV)])
    
    if (bw == TRUE) {
        palette <- colorRampPalette(c("#EEEEEE", "#999999", "#333333"), bias=2)(20)
    }

    print("Computing boostrapped smoothers ...")
    newx <- data.frame(seq(min(data[, IV]), max(data[, IV]), length=slices))
    colnames(newx) <- IV
    l0.boot <- matrix(NA, nrow=nrow(newx), ncol=B)
    
    l0 <- method(formula, data)
    for (i in 1:B) {
        data2 <- data[sample(nrow(data), replace=TRUE), ]
        data2 <- data2[order(data2[, IV]), ]
        if (class(l0)=="loess") {
            m1 <- method(formula, data2, control = loess.control(surface = "i", statistics="a", trace.hat="a"), ...)
        } else {
            m1 <- method(formula, data2, ...)
        }
        l0.boot[, i] <- predict(m1, newdata=newx)
    }

    # compute median and CI limits of bootstrap
    library(plyr)
    library(reshape2)
    CI.boot <- adply(l0.boot, 1, function(x) quantile(x,  prob=c(.05, .5, .95, pnorm(c(-2,-1.5, -1, 0, 1, 1.5, 2))), na.rm=TRUE))[, -1]
    colnames(CI.boot)[1:10] <- c("LL", "M", "UL", paste0("SD", 1:7))
    CI.boot$x <- newx[, 1]
    CI.boot$width <- CI.boot$UL - CI.boot$LL
    
    # scale the CI width to the range 0 to 1 and flip it (bigger numbers = narrower CI)
    CI.boot$w2 <- (CI.boot$width - min(CI.boot$width))
    CI.boot$w3 <- 1-(CI.boot$w2/max(CI.boot$w2))
	CI.boot$index<-1:slices
    
    # convert bootstrapped spaghettis to long format
    b2 <- melt(l0.boot)
    b2$x <- newx[,1]
    colnames(b2) <- c("index", "B", "value", "x")

    library(ggplot2)
    library(RColorBrewer)
    
    # Construct ggplot
    # All plot elements are constructed as a list, so they can be added to an existing ggplot
    
    # if add == FALSE: provide the basic ggplot object
    p0 <- ggplot(data, aes_string(x=IV, y=DV)) + theme_bw()
    
    # initialize elements with NULL (if they are defined, they are overwritten with something meaningful)
    gg.tiles <- gg.poly <- gg.spag <- gg.median <- gg.CI1 <- gg.CI2 <- gg.lm <- gg.points <- gg.title <- NULL

    if (shade == TRUE) {
        quantize <- match.arg(quantize, c("continuous", "SD"))
        if (quantize == "continuous") {
            print("Computing density estimates for each vertical cut ...")
            flush.console()
        
        	##make ylim dependent on xrange, not global
            if (is.null(ylim)) {
              #  min_value <- min(min(l0.boot, na.rm=TRUE), min(data[, DV], na.rm=TRUE))
              #  max_value <- max(max(l0.boot, na.rm=TRUE), max(data[, DV], na.rm=TRUE))
                 min_value <- min(min(l0.boot, na.rm=TRUE))
                max_value <- max(max(l0.boot, na.rm=TRUE))
               
               ylim <- c(min_value, max_value)
            }

			if(limitdens==FALSE) {
            # vertical cross-sectional density estimate
            d2 <- ddply(b2[, c("x", "value")], .(x), function(df) {
                res <- data.frame(density(df$value, na.rm=TRUE, n=slices, from=ylim[1], to=ylim[2])[c("x", "y")])
                #res <- data.frame(density(df$value, na.rm=TRUE, n=slices)[c("x", "y")])
                colnames(res) <- c("y", "dens")
                return(res)
            }, .progress="text")

			} else {
			 d2 <- ddply(b2[, c("index","x", "value")], .(x), function(df) {
                res <- data.frame(density(df$value, na.rm=TRUE, n=slices, from=CI.boot[CI.boot$index==df[1]$index,]$SD1, to=CI.boot[CI.boot$index==df[1]$index,]$SD7)[c("x", "y")])
                #res <- data.frame(density(df$value, na.rm=TRUE, n=slices)[c("x", "y")])
                colnames(res) <- c("y", "dens")
                return(res)
            }, .progress="text")
			
			
			}
            maxdens <- max(d2$dens)
            mindens <- min(d2$dens)
            d2$dens.scaled <- (d2$dens - mindens)/maxdens   
        
            ## Tile approach
            d2$alpha.factor <- d2$dens.scaled^shade.alpha
            gg.tiles <-  list(geom_tile(data=d2, aes(x=x, y=y, fill=dens.scaled, alpha=alpha.factor)), scale_fill_gradientn("dens.scaled", colours=palette), scale_alpha_continuous(range=c(0.001, 1)))
        }
        if (quantize == "SD") {
            ## Polygon approach
            
            SDs <- melt(CI.boot[, c("x", paste0("SD", 1:7))], id.vars="x")
            count <- 0
            d3 <- data.frame()
            col <- c(1,2,3,3,2,1)
            for (i in 1:6) {
                seg1 <- SDs[SDs$variable == paste0("SD", i), ]
                seg2 <- SDs[SDs$variable == paste0("SD", i+1), ]
                seg <- rbind(seg1, seg2[nrow(seg2):1, ])
                seg$group <- count
                seg$col <- col[i]
                count <- count + 1
                d3 <- rbind(d3, seg)
            }
            
            gg.poly <-  list(geom_polygon(data=d3, aes(x=x, y=value, color=NULL, fill=col, group=group)), scale_fill_gradientn("dens.scaled", colours=palette, values=seq(-1, 3, 1)))
        }
    }
    
    print("Build ggplot figure ...")
    flush.console()
    
    
    if (spag==TRUE) {
        gg.spag <-  geom_path(data=b2, aes(x=x, y=value, group=B), size=0.7, alpha=10/B, color=spag.color)
    }
    
    if (show.median == TRUE) {
        if (mweight == TRUE) {
            gg.median <-  geom_path(data=CI.boot, aes(x=x, y=M, alpha=w3^3), size=.6, linejoin="mitre", color=median.col)
        } else {
            gg.median <-  geom_path(data=CI.boot, aes(x=x, y=M), size = 0.6, linejoin="mitre", color=median.col)
        }
    }
    
    # Confidence limits
    if (show.CI == TRUE) {
     if(CIcol == "") { 
        gg.CI1 <- geom_path(data=CI.boot, aes(x=x, y=UL), size=1, color=palette[1], linetype=2, alpha=.5)
        gg.CI2 <- geom_path(data=CI.boot, aes(x=x, y=LL), size=1, color=palette[1],linetype=2, alpha=.5)
   	} else {
        gg.CI1 <- geom_path(data=CI.boot, aes(x=x, y=UL), size=1, color=CIcol, linetype=2, alpha=.5)
        gg.CI2 <- geom_path(data=CI.boot, aes(x=x, y=LL), size=1, color=CIcol,linetype=2, alpha=.5)   	
   	}
   }
    
    # plain linear regression line
    if (show.lm==TRUE) {gg.lm <- geom_smooth(method="lm", color="darkgreen", se=FALSE)}
    
    if(show.points==TRUE) {
    gg.points <- geom_point(data=data, aes_string(x=IV, y=DV), size=1, shape=shape, fill="white", color="black")        
    }    
    if (title != "") {
        gg.title <- theme(title=title)
    }
    

#    gg.elements <- list(gg.tiles, gg.poly, gg.spag, gg.median, gg.CI1, gg.CI2, gg.lm, gg.points, gg.title, theme(legend.position="none"), coord_cartesian(xlim = c(-0.4641767, 0.4405328),ylim= c(-.45,.4) ) )
 #   gg.elements <- list(gg.tiles, gg.poly, gg.spag, gg.median, gg.CI1, gg.CI2, gg.lm, gg.points, gg.title, theme(legend.position="none"), coord_cartesian(xlim = c(-0.3, 0.3),ylim= c(-.45,.4) ) )
 #   gg.elements <- list(gg.tiles, gg.poly, gg.spag, gg.median, gg.CI1, gg.CI2, gg.lm, gg.points, gg.title, theme(legend.position="none"), coord_cartesian(xlim = c(-0.4, 0.4),ylim= c(-.05,.05) ) )
    
    gg.elements <- list(gg.tiles, gg.poly, gg.spag, gg.median, gg.CI1, gg.CI2, gg.lm, gg.points, gg.title, theme(legend.position="none") )
  
  if (add == FALSE) {
        return(p0 + gg.elements)
    } else {
        return(gg.elements)
    }
}

deMean<-function(var, data, fe) {

res<-felm(attacks ~ (0) |  dd +rpt  | (0) | dd , data=DTA[!is.na(anyviolence)][treatov==1][!is.na(l1yndvi)])$residuals


}

setwd("/Users/thiemo/Dropbox/Research/Thiemo_and_Stephan/Replication/")
load_or_install(c("ggmap","haven","operator.tools","lfe","foreign","data.table"))

###see whether effect even if i exclude residuals

DTA<-data.table(data.frame(read_dta(file="temporary data files/TEMP.dta")))

res1<-felm(lgc_alloc_mo ~ 0 |  ID_lgc + stm  | 0 | ID_lgc, data=DTA[lgc_elec_status==0 & !is.na(lgc_alloc_mo) & !is.na(lgc_elec_status)])$residuals
res2<-felm(anycivilianviolence ~ 0 |  ID_lgc + stm  | 0 | ID_lgc, data=DTA[lgc_elec_status==0& !is.na(lgc_alloc_mo) & !is.na(lgc_elec_status)])$residuals
RESIDS<-data.table(data.frame("res1"=res1,"res2"=res2))


p1c<-vwReg(anycivilianviolence~lgc_alloc_mo, data=data.frame(RESIDS),CIcol="#ffffff", show.lm=TRUE, slices=2000, palette=colorRampPalette(c("white", "#ffcccc","#ff8080","#800000"), bias=5)(20))
p1c+coord_cartesian(xlim = c(-0.25, 0.25), ylim=c(-.04,.04))+ geom_hline(yintercept =0, alpha=.3)+ geom_hline(yintercept =0, alpha=.3)+xlab("Allocation Residuals") + ylab("Conflict Residuals")+ theme(axis.text=element_text(size=16), axis.title=element_text(size=20)) 

ggsave("figures/watercolor-appointed.png", plot = last_plot())



res1<-felm(lgc_alloc_mo ~ 0 |  ID_lgc + stm  | 0 | ID_lgc, data=DTA[lgc_elec_status==1 & !is.na(lgc_alloc_mo) & !is.na(lgc_elec_status)])$residuals
res2<-felm(anycivilianviolence ~ 0 |  ID_lgc + stm  | 0 | ID_lgc, data=DTA[lgc_elec_status==1& !is.na(lgc_alloc_mo) & !is.na(lgc_elec_status)])$residuals
RESIDS<-data.table(data.frame("res1"=res1,"res2"=res2))


p1c<-vwReg(anycivilianviolence~lgc_alloc_mo, data=data.frame(RESIDS),CIcol="#ffffff", show.lm=TRUE, slices=2000, palette=colorRampPalette(c("white", "#ffcccc","#ff8080","#800000"), bias=5)(20))
p1c+coord_cartesian(xlim = c(-0.25, 0.25), ylim=c(-.04,.04))+ geom_hline(yintercept =0, alpha=.3)+ geom_hline(yintercept =0, alpha=.3)+xlab("Allocation Residuals") + ylab("Conflict Residuals")+ theme(axis.text=element_text(size=16), axis.title=element_text(size=20)) 

ggsave("figures/watercolor-elected.png", plot = last_plot())
